ggplot() +
geom_sf(data=srme) +
geom_sf(data=wrnf, fill="darkgrey") +
geom_sf(data=blocks, fill=NA, lwd=0.4) +
coord_sf(crs="EPSG:32613") +
theme_light(11)
Load the Sentinel-2 annual time-series (2019).
# Spectral Response
ts <- read_csv('../../data/tabular/mod/results/s2msi_l2a_aspen_TSy2019.csv',
show_col_types = FALSE) %>%
select(-c(id,.geo,TCB,TCG,TCW)) %>%
rename(NDVI705 = NDRE)
head(ts)
NA
Tidy the time-series data.
# Filter out cloud-contaminated pixels based on the Cloud Score + values
# Reference: https://medium.com/google-earth/all-clear-with-cloud-score-bd6ee2e2235e
tsp <- ts %>%
filter(
cs >= 0.8, # >= Cloud Score + contamination
) %>%
# add the month as a column
mutate(month = month(image_date, label=TRUE, abbr=TRUE)) %>%
# Filter null values
filter(complete.cases(.))
# Test using some visualizations
# NDVI705 scatter plot with trend line
tsp %>%
group_by(image_date) %>%
summarize(band = median(NDVI705)) %>%
ggplot(aes(x=image_date,y=band)) +
geom_point(size=0.8) +
geom_smooth(method="loess") +
labs(x="Image Date", y="NDRE") +
theme_bw(11)
# NDVI705 box plot
ggplot(data=tsp, aes(x=NDVI705, y=month)) +
geom_boxplot() +
coord_flip() +
labs(y="Image Date", x="NDRE") +
theme_bw(11)
rm(ts)
Now pivot the table longer:
tsp_ <- tsp %>%
select(c(starts_with("B"),image_date,
SLAVI,NDVI705,NDMI,ChlRE,IRECI,MCARI)) %>%
group_by(image_date) %>%
summarize_all(list(median)) %>%
pivot_longer(
cols = -c(image_date),
names_to="band",
values_to="reflectance"
) %>%
# add a month and week label
mutate(
month = month(image_date, label=TRUE, abbr=TRUE),
week = isoweek(image_date),
biweek = cut.Date(image_date, breaks="2 week", labels=FALSE)
)
head(tsp_, n=19)
rm(tsp)
gc()
used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 6004919 320.7 13778540 735.9 NA 8988140 480.1
Vcells 47615836 363.3 145620122 1111.0 16384 182020047 1388.8
Breakpoint analysis to identify significant shift in spectral signature throughout the year based on NDRE:
# Isolate one of the vegetation indices (Normalized Difference Red-edge Index)
df <- tsp_ %>%
filter(band == "NDVI705") %>%
select(image_date,week,reflectance) %>%
group_by(image_date,week) %>%
summarize(reflectance = median(reflectance)) %>%
ungroup()
`summarise()` has grouped output by 'image_date'. You can override using the `.groups` argument.
# Create a weekly time-series object
df.ts <- ts(df$reflectance, frequency = 52)
# Implement the changepoint analysis based on NDRE
result <- changepoint::cpt.meanvar(df.ts, method="PELT")
bp.ind <- changepoint::cpts(result)
# Map breakpoints to dates
print("Breakpoint weeks:")
[1] "Breakpoint weeks:"
bp.dates <- df$week[bp.ind]
bp.dates
[1] 11 17 21 26 43 47
# Plot the result
plot(result)
rm(df, df.ts, result, bp.ind, bp.dates)
Perform the analysis by spatial block grid to get an idea of the variation in time-series breakpoints.
Plot of median daily observations:
# Reorder band factors
tsp_$band <- factor(tsp_$band,
levels = c("B2","B3","B4","B5","B6","B7","B8","B8A","B11","B12",
"ChlRE","IRECI","NDMI","NDVI705","SLAVI","MCARI"))
tsp_ %>%
filter(str_detect(band,"B")) %>%
ggplot(aes(x=image_date,y=reflectance ,color=factor(band))) +
geom_line(linewidth=0.4) +
scale_color_viridis_d(option="turbo") +
labs(x="Acquisition Date",y="Reflectance",color="S2 MSI Band") +
theme_bw(14) +
theme(axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
legend.text = element_text(size=10),
legend.title = element_text(size=11))
Gather the seasonal composite start and end dates:
# Summer
(start_summer <- week(as.Date("2019-05-25")))
[1] 21
(end_summer <- week(as.Date("2019-08-13")))
[1] 33
# Autumn
(start_autumn <- week(as.Date("2019-09-02")))
[1] 35
(end_autumn <- week(as.Date("2019-11-14")))
[1] 46
Generate the weekly median reflectance.
# List for band colors
band_colors <- c(
"B2" = "#1E90FF", # Blue: Dodger Blue
"B3" = "#00FF7F", # Green: Spring Green
"B4" = "#FF0000", # Red: Pure Red
"B5" = "#FFA07A", # Red Edge: Light Salmon
"B6" = "#FF8C00", # NIR: Dark Orange
"B7" = "#D2691E", # NIR: Chocolate
"B8" = "#FFD700", # NIR: Gold
"B8A" = "#B8860B", # NIR: Dark Goldenrod
"B11" = "#008B8B", # SWIR: Dark Cyan (dark shade of teal)
"B12" = "#800080" # SWIR: Purple
)
# Group by week, calculate the mean, plot
f3a <- tsp_ %>%
filter(str_detect(band,"B")) %>%
group_by(band,week) %>%
summarize(refl = mean(reflectance)) %>%
ggplot(aes(x=week,y=refl,color=factor(band))) +
geom_line(linewidth=0.6) +
scale_color_manual(values = band_colors) +
labs(x="Acquisition Week",y="Reflectance",color=NULL) +
theme_bw(11) +
theme(axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
legend.text = element_text(size=8),
legend.title = element_text(size=8),
legend.position = c(0.2,0.95),
legend.direction = "horizontal",
legend.background = element_rect(fill="white", color="black"),
plot.margin = unit(c(1.2,0.5,1.2,0.5), 'lines'),
text = element_text(family = "Arial")) +
annotate('rect', xmin=start_summer, xmax=end_summer, ymin=0, ymax=6000, alpha=.3, fill='gray') +
annotate('rect', xmin=start_autumn, xmax=end_autumn, ymin=0, ymax=6000, alpha=.3, fill='gray') +
geom_vline(xintercept=start_summer, linetype="dotted", lwd=0.8) +
geom_vline(xintercept=end_summer, linetype="dotted", lwd=0.8) +
geom_vline(xintercept=start_autumn, linetype="dotted", lwd=0.8) +
geom_vline(xintercept=end_autumn, linetype="dotted", lwd=0.8)
f3a
# All bands
# Calculate the maximum reflectance for each band
max_refl_band <- tsp_ %>%
group_by(band) %>%
summarize(max_refl = max(reflectance))
# Now create your plot
tsp_ %>%
group_by(band, week) %>%
summarize(refl = mean(reflectance), .groups = 'drop') %>%
ggplot(aes(x = week, y = refl, group = 1)) +
geom_line() +
geom_rect(data = max_refl_band, aes(xmin=start_summer, xmax=end_summer, ymin=0, ymax=max_refl), alpha=.3, fill='gray',
inherit.aes = FALSE) +
geom_rect(data = max_refl_band, aes(xmin=start_autumn, xmax=end_autumn, ymin=0, ymax=max_refl), alpha=.3, fill='gray',
inherit.aes = FALSE) +
geom_vline(xintercept = start_summer, linetype = "dotted", lwd = 0.8) +
geom_vline(xintercept = end_summer, linetype = "dotted", lwd = 0.8) +
geom_vline(xintercept = start_autumn, linetype = "dotted", lwd = 0.8) +
geom_vline(xintercept = end_autumn, linetype = "dotted", lwd = 0.8) +
facet_wrap(. ~ band, scales = "free_y") +
theme_minimal() +
theme(axis.title.y = element_text(size = 8, face = "italic",
margin = unit(c(0, 0.4, 0, 0), "cm")),
axis.title.x = element_text(size = 8, face = "italic",
margin = unit(c(0.4, 0, 0, 0), "cm")),
axis.text = element_text(size = 7),
strip.text.x = element_text(size = 8))
# Spectral indices
max_refl_band_vi <- max_refl_band %>%
filter(!str_detect(band,"B"))
f3b <- tsp_ %>%
filter(!str_detect(band,"B")) %>%
group_by(band,week) %>%
summarize(refl = mean(reflectance)) %>%
ggplot(aes(x=week, y=refl, group=1)) +
geom_line(linewidth=0.6) +
labs(x="Acquisition Week",y="") +
geom_rect(data = max_refl_band_vi, aes(xmin = start_summer, xmax = end_summer, ymin = 0, ymax = max_refl), alpha = .3, fill = 'gray',
inherit.aes = FALSE) +
geom_rect(data = max_refl_band_vi, aes(xmin = start_autumn, xmax = end_autumn, ymin = 0, ymax = max_refl), alpha = .3, fill = 'gray',
inherit.aes = FALSE) +
geom_vline(xintercept=start_summer, linetype="dotted", lwd=0.8) +
geom_vline(xintercept=end_summer, linetype="dotted", lwd=0.8) +
geom_vline(xintercept=start_autumn, linetype="dotted", lwd=0.8) +
geom_vline(xintercept=end_autumn, linetype="dotted", lwd=0.8) +
facet_wrap(. ~ band, scales = "free") +
theme_light(11) +
theme(axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
strip.text.x = element_text(size = 11),
plot.margin = unit(c(1.2,0.5,1.2,0.5), 'lines'),
text = element_text(family = "Arial"))
f3b
rm(max_refl_band,max_refl_band_vi)
cols <- c(
"Summer" = "#87CEEB",
"Autumn" = "#DAA520"
)
seasonal <- tsp_ %>%
mutate(season = if_else(week>=22&week<=37,"Summer","na"),
season = if_else(week>=37&week<=48,"Autumn",season)) %>%
filter(season!="na")
seasonal$season <- factor(seasonal$season, levels = c("Summer","Autumn"))
f3c <- seasonal %>%
filter(str_detect(band,"B")) %>%
ggplot(aes(x=band,y=reflectance,fill=season)) +
geom_boxplot(outlier.size = 0.5, outlier.color = "grey30") +
scale_fill_manual(values = cols) +
theme_bw(12) +
labs(x="Sentinel-2 MSI Band",y="Reflectance",fill="Season: ") +
theme(axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
legend.position = "none",
plot.margin = unit(c(1.2,0.5,1.2,0.5), 'lines'),
text = element_text(family = "Arial"))
f3c
f3d <- seasonal %>%
filter(!str_detect(band,"B")) %>%
ggplot(aes(y=reflectance)) +
geom_boxplot(aes(fill=season)) +
scale_fill_manual(values = cols) +
facet_wrap(. ~ band, scales = "free") +
theme_bw(12) +
labs(fill="Season",y="") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
strip.text.x = element_text(size = 11),
# legend.position = c(0.85,0.2),
legend.position = "bottom",
legend.title = element_text(size=12),
legend.text = element_text(size=12),
legend.spacing.y = unit(1.5, "cm"),
legend.spacing.x = unit(0.5, 'cm'),
plot.margin = unit(c(1.2,0.5,1.2,0.5), 'lines'),
text = element_text(family = "Arial"))
f3d
Combine the plots:
# Arrange in a multi-panel plot
f3 <- ggarrange(f3a,f3b,f3c,f3d, nrow=2,ncol=2, widths=c(1, 0.75), labels = c("A", "B", "C", "D"))
Error in ggarrange(f3a, f3b, f3c, f3d, nrow = 2, ncol = 2, widths = c(1, :
object 'f3a' not found
Version 2:
f3_ <- ggarrange(f3a,ggarrange(f3c,f3d,nrow=1,ncol=2),
ncol=1,widths=c(1, 0.75),
labels = c("A", "B", "C"))
Warning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font databaseWarning: font family 'Arial' not found in PostScript font database
f3_
ggsave(f3_, file = "../../figures/Figure3_SpectralResponse_v2.png",
dpi = 300, bg="white") # adjust dpi accordingly
Saving 14 x 8 in image
Clean up the time-series / spectral response data:
rm(f3,f3a,f3b,f3c,f3d,seasonal,band_colors,cols,end_autumn,end_summer,start_autumn,start_summer,tsp_)
gc()
used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 3530698 188.6 10685571 570.7 NA 10685571 570.7
Vcells 9107527 69.5 115377707 880.3 16384 180274361 1375.4
Load the accuracy assessment results for the best performing model (the final model in GEE). This represents the most parsimonious model (multicollinear bands removed and feature selection in rfUtilities). See the “accmeas.R” script.
accmeas <- read_csv('../../data/tabular/mod/results/best_model/southern_rockies_accmeas.csv',
show_col_types = FALSE)
opt_thresh <- read_csv('../../data/tabular/mod/results/best_model/southern_rockies_opt_thresh.csv',
show_col_types = FALSE)
glimpse(accmeas)
Rows: 1,000
Columns: 16
$ model <dbl> 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995…
$ prSize <dbl> 5702, 5702, 5702, 5702, 5702, 5702, 5702, 5702, 5702, 5702, 5702, 5702, 5702, 5702, 5702, 5702, 57…
$ bgSize <dbl> 31332, 31332, 31332, 31332, 31332, 31332, 31332, 31332, 31332, 31332, 31332, 31332, 31332, 31332, …
$ cutoff <dbl> 0.00000000, 0.01010101, 0.02020202, 0.03030303, 0.04040404, 0.05050505, 0.06060606, 0.07070707, 0.…
$ tp <dbl> 5702, 5699, 5693, 5678, 5655, 5636, 5628, 5623, 5615, 5607, 5597, 5586, 5579, 5567, 5555, 5545, 55…
$ tpr <dbl> 1.0000000, 0.9994739, 0.9984216, 0.9957910, 0.9917573, 0.9884251, 0.9870221, 0.9861452, 0.9847422,…
$ fn <dbl> 0, 3, 9, 24, 47, 66, 74, 79, 87, 95, 105, 116, 123, 135, 147, 157, 165, 177, 182, 197, 203, 217, 2…
$ tn <dbl> 0, 18718, 21721, 23814, 25884, 27044, 27731, 28193, 28521, 28794, 29045, 29269, 29450, 29600, 2972…
$ tnr <dbl> 0.0000000, 0.5974084, 0.6932529, 0.7600536, 0.8261203, 0.8631431, 0.8850696, 0.8998149, 0.9102834,…
$ fp <dbl> 31332, 12614, 9611, 7518, 5448, 4288, 3601, 3139, 2811, 2538, 2287, 2063, 1882, 1732, 1605, 1478, …
$ fpr <dbl> 1.00000000, 0.40259160, 0.30674710, 0.23994638, 0.17387974, 0.13685689, 0.11493042, 0.10018511, 0.…
$ accuracy <dbl> 0.1539666, 0.6593131, 0.7402387, 0.7963493, 0.8516228, 0.8824324, 0.9007669, 0.9131069, 0.9217476,…
$ precision <dbl> 0.1539666, 0.3111997, 0.3719942, 0.4302819, 0.5093218, 0.5679162, 0.6098169, 0.6417485, 0.6663897,…
$ recall <dbl> 1.0000000, 0.9994739, 0.9984216, 0.9957910, 0.9917573, 0.9884251, 0.9870221, 0.9861452, 0.9847422,…
$ f1 <dbl> 0.2668476, 0.4746200, 0.5420356, 0.6009101, 0.6730140, 0.7213618, 0.7538678, 0.7775166, 0.7948754,…
$ mcc <dbl> NA, 0.4308758, 0.5069640, 0.5696185, 0.6442674, 0.6939353, 0.7276642, 0.7523881, 0.7705284, 0.7862…
head(opt_thresh)
NA
scenarios <- read_csv("../../data/tabular/mod/results/scenarios/rf_accmets_scenarios.csv", show_col_types = FALSE)
# Read in the best performing model results to append
final_model <- opt_thresh %>%
mutate(Feature_Set = "Final_Model") %>%
rename(F1 = f1) %>%
select(Feature_Set,F1)
scenarios <- bind_rows(scenarios,final_model)
head(scenarios)
# Box plot version
ggplot(data=scenarios, aes(x=reorder(Feature_Set,F1),y=F1)) +
geom_boxplot(outlier.size=0.6, fill="lightblue") +
ylim(0,1) +
labs(x="Classification Scenario",y="F1-score", tag="A") +
theme_bw(10) +
theme(
plot.margin = unit(c(0.2,0.2,0.2,0.2), 'lines'),
axis.title.y = element_text(size=10,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=10,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=9),
strip.text.x = element_text(size = 10),
axis.text.x = element_text(angle = 45, hjust=1),
text = element_text(family = "Arial"))
# Calculating the means and standard errors for F1-scores
# Plot as a bar chart with standard errors
f3a <- scenarios %>%
group_by(Feature_Set) %>%
summarise(
Mean = mean(F1),
SE = sd(F1) / sqrt(n())
) %>%
# Plotting the bar chart with error bars
ggplot(aes(x=reorder(Feature_Set, Mean), y=Mean)) +
geom_bar(stat="identity", fill="lightblue", width=0.75) +
geom_errorbar(aes(ymin=Mean-SE, ymax=Mean+SE), width=0.2) +
ylim(0, 1) +
labs(x="Classification Scenario", y="F1-score") +
theme_bw(base_size = 10) +
theme(
plot.margin = unit(c(0.2,0.2,0.2,0.2), 'lines'),
axis.title.y = element_text(size=10, face="italic", margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=10, face="italic", margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=9),
strip.text.x = element_text(size = 10),
axis.text.x = element_text(angle = 45, hjust=1),
text = element_text(family = "Arial")
)
f3a
# Save out
ggsave(f3a, file = "../../figures/Figure4A_Class_Scenarios.png",
dpi = 300, bg="white") # adjust dpi accordingly
# Read in the best performing model results to append
final_model <- opt_thresh %>%
mutate(Feature_Set = "Final_Model") %>%
rename(F1 = f1) %>%
select(Feature_Set,F1)
# Add the final model to the data frame
scenarios_ <- bind_rows(scenarios,final_model)
# Create the final table
df <- scenarios_ %>%
group_by(Feature_Set) %>%
summarize(
F1_mn = mean(F1),
F1_sd = sd(F1)
) %>%
mutate(
n_features = if_else(Feature_Set == "Summer_S1" | Feature_Set == "Winter_S1", 5, 0),
n_features = if_else(Feature_Set == "Summer_S1_GLCM" | Feature_Set == "Winter_S1_GLCM", 13, n_features),
n_features = if_else(Feature_Set == "Summer_Winter_S1", 23, n_features),
n_features = if_else(Feature_Set == "Summer_S2" | Feature_Set == "Autumn_S2", 13, n_features),
n_features = if_else(Feature_Set == "Summer_S2_VI" | Feature_Set == "Autumn_S2_VI", 19, n_features),
n_features = if_else(Feature_Set == "Summer_Autumn_S2", 35, n_features),
n_features = if_else(Feature_Set == "Combined_S1_S2", 55, n_features),
n_features = if_else(Feature_Set == "Final_Model", 17, n_features)
)
# Create a tidy table
# Set font name for table
fontname <- "Times New Roman"
# Clean up extra rows and digits
cleaned <- df %>%
mutate_if(is.double, ~ round(., digits = 4)) %>%
# Sort from fastest to slowest
arrange(desc(F1_mn))
# Fix names and add units
names(cleaned) <- c("Feature Set","Mean F1-score","Standard Deviation","Number of Features")
# Create the flextable
ft <- flextable::flextable(cleaned) %>%
flextable::font(fontname = fontname, part = "all") %>%
flextable::autofit() %>%
flextable::fit_to_width(7.5)
ft
Feature Set | Mean F1-score | Standard Deviation | Number of Features |
|---|---|---|---|
Final_Model | 0.9321 | 0.0080 | 17 |
Combined_S1_S2 | 0.9143 | 0.0118 | 55 |
Summer_Autumn_S2 | 0.9091 | 0.0122 | 35 |
Summer_S2_VI | 0.8768 | 0.0155 | 19 |
Summer_S2 | 0.8725 | 0.0150 | 13 |
Autumn_S2_VI | 0.8416 | 0.0175 | 19 |
Autumn_S2 | 0.8326 | 0.0185 | 13 |
Summer_Winter_S1 | 0.6231 | 0.0303 | 23 |
Winter_S1_GLCM | 0.5930 | 0.0372 | 13 |
Summer_S1 | 0.5589 | 0.0241 | 5 |
Summer_S1_GLCM | 0.5472 | 0.0203 | 13 |
Winter_S1 | 0.4629 | 0.0274 | 5 |
# print(ft, preview = "docx")
rm(scenarios_,df,ft,fontname,cleaned)
This will go into the supplement.
The optimum cutoff value for classification is defined as the average cutoff across folds at which the F1-score is maximized. We can see the optimum cutoff value and how it corresponds with F1-score and the AUC-PR curve.
paste0("Optimum cutoff for classification: ",round((opt_cutoff <- mean(opt_thresh$cutoff_f1)),3))
[1] "Optimum cutoff for classification: 0.42"
paste0("Average F1-score: ",round((f1max <- max(opt_thresh$f1)),3))
[1] "Average F1-score: 0.942"
# AUC-PR Curve (approximation)
fs2a <- ggplot(data=accmeas) +
geom_line(aes(x=fpr,y=tpr,color=factor(model)),linewidth=0.4) +
scale_color_viridis_d(option="turbo") +
scale_x_continuous(limits=c(0,1)) + # Set x axis limits from 0 to 1
scale_y_continuous(limits=c(0,1)) +
labs(x='False Positive Rate', y='True Positive Rate', tag="A") +
coord_fixed(ratio = 1) + # to keep the x and y axes scales same
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "black") + # diagonal
theme_light() +
theme(legend.position = "none",
axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
strip.text.x = element_text(size = 11))
fs2b <- ggplot(data=accmeas) +
geom_line(aes(x=cutoff,y=f1,color=factor(model)),linewidth=0.4) +
scale_color_viridis_d(option="turbo") +
# geom_vline(xintercept=0.50, linetype="dashed", color="black") +
geom_point(aes(x=opt_cutoff, y=f1max),
color = "black", size = 3, shape = 19) +
scale_x_continuous(limits=c(0,1)) + # Set x axis limits from 0 to 1
scale_y_continuous(limits=c(0,1)) +
labs(x='Classification Threshold', y='F1 Score', tag="B") +
geom_text(aes(x=opt_cutoff, y=f1max),
label = paste0('Optimal threshold: ', round(opt_cutoff,3)),
nudge_x = 0.0, nudge_y = 0.05, size = 3.5) +
coord_fixed(ratio = 1) + # to keep the x and y axes scales same
theme_light() +
theme(legend.position = "none",
axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
strip.text.x = element_text(size = 11))
# Arrange
(fs2 <- ggarrange(fs2a,fs2b))
# Save out
ggsave(fs2, file = "../../figures/FigureS2_Model_Performance.png",
dpi = 300, bg="white") # adjust dpi accordingly
Saving 6.5 x 3 in image
rm(fs2a,fs2b,fs2)
Using the optimum cutoff value, we can classify the test data and calculate the Precision, Recall, and F1-score.
testPart <- "../../data/tabular/mod/results/best_model/southern_rockies_test_probs_n55.csv"
# Assign the classification label based on the optimum cutoff
testData <- read_csv(testPart, show_col_types=FALSE) %>%
rename(gee_id = `system:index`,
TrueLabel = label) %>%
dplyr::select(-.geo) %>%
mutate(probability = probability*0.001, # scale back
ClassLabel = if_else(probability >= opt_cutoff, 1, 0)) %>%
group_by(seed) %>%
summarize(
tp = sum(TrueLabel == 1 & ClassLabel == 1),
tn = sum(TrueLabel == 0 & ClassLabel == 0),
fp = sum(TrueLabel == 0 & ClassLabel == 1),
fn = sum(TrueLabel == 1 & ClassLabel == 0),
accuracy = (tp + tn) / (tp + tn + fp + fn),
precision = tp / (tp + fp),
recall = tp / (tp + fn),
f1 = 2 * (precision * recall) / (precision + recall),
.groups = 'drop' # This ensures that the result is a single data frame, not a grouped one
)
head(testData,10)
# First, get the overall F1/MCC for the classification
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
[1] "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
paste0("Average F1-score for the best performing model at the optimum cutoff: ",mean(testData$f1))
[1] "Average F1-score for the best performing model at the optimum cutoff: 0.931081536861107"
paste0("Range of F1-scores: ",range(testData$f1))
[1] "Range of F1-scores: 0.915951638065523" "Range of F1-scores: 0.940687361419069"
paste0("StDev of F1-scores: ",sd(testData$f1))
[1] "StDev of F1-scores: 0.00849272592798287"
summary(testData$f1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.9160 0.9245 0.9323 0.9311 0.9384 0.9407
# rm(testPart,testData)
Plot the model accuracy results for the optimum threshold.
Figure 4A: Model Accuracy (F1 and OA)
cols <- c("F1 Score"="#1f78b4","Overall Accuracy"="gray75")
testData <- testData %>%
mutate(Fold = as.numeric(factor(seed, levels = sort(unique(seed)))))
f4b <- ggplot(data=testData) +
geom_hline(yintercept=mean(testData$precision),linetype="dashed",color="gray85") +
geom_hline(yintercept=mean(testData$recall),linetype="dashed",color="gray45") +
geom_hline(yintercept=mean(testData$f1),linetype="dashed",color="#1f78b4") +
# Plot the Precision and Recall
geom_line(aes(x=factor(Fold),y=precision,group=1),color="gray65") +
geom_line(aes(x=factor(Fold),y=recall,group=1),color="gray35") +
# Plot the F1-score
geom_line(aes(x=factor(Fold),y=f1,group=1),color="#1f78b4") +
geom_point(aes(x=factor(Fold),y=f1,size=f1), color="#1f78b4") +
scale_size(range = c(3,6)) +
coord_cartesian(ylim = c(0.85, 1)) +
labs(x="Fold", y="Score", tag="B") +
# annotate("text", x = 2.6, y = 0.88, label = paste("Average F1 Score: ", round(mean(testData$f1), 2))) +
theme_bw() +
guides(size="none") +
theme(
plot.margin = unit(c(0.2,0.2,0.2,0.2), 'lines'),
axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
strip.text.x = element_text(size = 11),
text = element_text(family = "Arial"))
f4b
ggsave(f4b, file = "../../figures/Figure4A_Best_Model_Acc_F1.png",
dpi = 300, bg="white") # adjust dpi accordingly
arr4 <- ggarrange(f4a,ggarrange(f4b,f5b, ncol=2, nrow=1, align='h'), nrow=2, align="h")
Warning: Removed 30 rows containing missing values (`geom_line()`).Warning: Removed 1000 rows containing missing values (`geom_text()`).
arr4
ggsave(arr4, file = "../../figures/Figure4_ModelSel_BestModel_ThreshOpt.png",
dpi = 300, bg="white") # adjust dpi accordingly
Saving 7.5 x 7.5 in image
# Feature Importance
ftr_imp <- read_csv('../../data/tabular/mod/results/best_model/southern_rockies_feature_imps_n55.csv',
show_col_types = FALSE)
glimpse(ftr_imp)
Rows: 10
Columns: 20
$ `system:index` <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
$ B2_autumn <dbl> 49.49731, 53.55798, 46.15995, 44.59732, 36.44691, 52.99102, 49.53374, 53.95200, 50.03963, 45.…
$ B2_summer <dbl> 34.97310, 32.51703, 37.61583, 39.28662, 31.10338, 33.16864, 30.08473, 33.45479, 37.49951, 37.…
$ B3_autumn <dbl> 165.1867, 157.1493, 168.6397, 162.4648, 149.4557, 168.8254, 161.2280, 162.8126, 170.3982, 157…
$ B4_autumn <dbl> 89.04790, 102.29013, 99.24388, 96.15822, 88.75827, 93.63894, 89.72539, 104.08191, 98.86185, 1…
$ B4_summer <dbl> 72.03866, 84.54776, 67.78854, 75.62744, 65.89027, 80.50073, 75.79995, 77.82927, 76.71519, 77.…
$ B5_summer <dbl> 58.24769, 64.08761, 58.65716, 70.13235, 49.87718, 68.41857, 61.11606, 62.80698, 63.17702, 56.…
$ CIRE_autumn <dbl> 57.83576, 58.81169, 64.52829, 52.95184, 53.56839, 61.44669, 50.77975, 62.17084, 63.96603, 61.…
$ CIRE_summer <dbl> 188.3868, 190.1987, 159.6398, 185.3497, 194.2375, 198.4452, 181.9208, 211.1488, 163.8799, 195…
$ MNDWI_autumn <dbl> 356.2320, 337.3275, 330.8980, 316.3036, 316.3465, 353.9160, 324.2431, 353.9166, 337.8683, 316…
$ MNDWI_summer <dbl> 42.57642, 40.28980, 43.35349, 38.25414, 38.65055, 49.04463, 37.95904, 42.87194, 41.04543, 39.…
$ SLAVI_autumn <dbl> 29.03284, 31.82708, 35.25218, 30.28867, 26.53197, 35.65043, 25.52154, 31.37167, 30.51408, 30.…
$ SLAVI_summer <dbl> 151.3940, 149.6327, 132.2234, 135.1090, 159.6722, 168.1524, 162.8332, 171.8494, 144.0258, 151…
$ VH_summer_ent <dbl> 4.398385, 5.996275, 4.397207, 3.379485, 5.188208, 5.741732, 6.728794, 6.930599, 7.557105, 6.2…
$ VH_winter_ent <dbl> 4.592535, 6.320210, 5.227573, 5.173139, 5.419760, 6.514255, 8.699771, 10.378239, 6.127086, 7.…
$ VV_summer <dbl> 127.0105, 159.4653, 147.7406, 142.7680, 142.0232, 180.8220, 149.5889, 176.8713, 160.5565, 161…
$ VV_winter <dbl> 53.30679, 51.75154, 54.96236, 58.44277, 55.25150, 67.25097, 56.83036, 62.59225, 57.43706, 65.…
$ elevation <dbl> 282.0402, 276.5765, 268.8157, 251.2940, 272.3188, 301.4409, 266.4146, 280.5173, 305.6660, 272…
$ seed <dbl> 995, 822, 523, 102, 121, 433, 590, 151, 858, 506
$ .geo <chr> "{\"type\":\"MultiPoint\",\"coordinates\":[]}", "{\"type\":\"MultiPoint\",\"coordinates\":[]}…
# Tidy the data frame
df.imp <- ftr_imp %>%
select(-c(.geo,`system:index`)) %>%
rename(model = seed) %>%
mutate(model = as.factor(model))
# Pivot longer
df.imp.p <- df.imp %>%
pivot_longer(cols = -model) %>%
rename(importance = value,
band = name) %>%
mutate(season = if_else(str_detect(band,"_autumn"), "Autumn S2", "Summer S2"),
season = if_else(str_detect(band,"_winter"), "Winter S1", season),
season = if_else(str_detect(band,"VV_summer"), "Summer S1", season),
season = if_else(str_detect(band,"VH_summer"), "Summer S1", season))
glimpse(df.imp.p)
Rows: 170
Columns: 4
$ model <fct> 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 995, 822, 822, 82…
$ band <chr> "B2_autumn", "B2_summer", "B3_autumn", "B4_autumn", "B4_summer", "B5_summer", "CIRE_autumn", "CIR…
$ importance <dbl> 49.497311, 34.973098, 165.186734, 89.047904, 72.038658, 58.247693, 57.835765, 188.386846, 356.232…
$ season <chr> "Autumn S2", "Summer S2", "Autumn S2", "Autumn S2", "Summer S2", "Summer S2", "Autumn S2", "Summe…
# Grab the top 20 over all model runs
top <- df.imp.p %>%
group_by(band) %>%
summarize(median = median(importance)) %>%
ungroup()
top <- head(arrange(top,desc(median)), n = 10)
# Color palette
cols <- c(
"Summer S2" = "#87CEEB",
"Autumn S2" = "#DAA520",
"Winter S1" = "gray29",
"Summer S1" = "gray89"
)
# Boxplot
# %>% filter(band %in% top$band)
f6 <- ggplot(data=df.imp.p %>% filter(band %in% top$band),
aes(x=reorder(band,importance), fill=season)) +
geom_boxplot(aes(y=importance), position = position_dodge(0.5)) +
scale_fill_manual(values = cols,
labels = c("Autumn S2","Summer S1","Summer S2","Winter S1")) +
coord_flip() +
theme_bw(11) +
theme(axis.text.x = element_text(size=11),
axis.text.y = element_text(angle = 0, vjust=0, size=11),
axis.title = element_text(size=11,face="italic"),
legend.position = "bottom",
legend.justification = c(1.2,0.5), # Left-aligns the legends
legend.text = element_text(size=11),
text = element_text(family = "Arial"),
plot.margin=unit(c(0.5,0.5,0.5,0.5),"cm")) +
labs(x="Feature", y="Importance", fill="Season: ")
f6
ggsave(f6, file = "../../figures/Figure6_FeatureImportance_top10.png",
dpi=300, bg="white") # adjust dpi accordingly
Clean up!
rm(list = ls.str(mode = 'numeric'))
rm(accmeas,accmeas.best,accmeas.mn,accmeas.s,df.imp,
df.imp.p,f4,f4a,f4b,f6,ftr_imp,top,cols,arr4,cleaned,
df,f5b,final_model,ft,opt_thresh,scenarios,scenarios_,testData)
Warning: object 'accmeas.best' not foundWarning: object 'accmeas.mn' not foundWarning: object 'accmeas.s' not foundWarning: object 'f4' not foundWarning: object 'f4a' not foundWarning: object 'arr4' not foundWarning: object 'cleaned' not foundWarning: object 'df' not foundWarning: object 'f5b' not foundWarning: object 'ft' not foundWarning: object 'scenarios_' not found
gc()
used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 3815449 203.8 5673305 303.0 NA 5673305 303.0
Vcells 43611970 332.8 82892856 632.5 16384 74919126 571.6
ref_acc <- read_csv('../../data/tabular/mod/results/best_model/southern_rockies_ref_accmeas.csv',
show_col_types = F)
head(ref_acc)
# Create a tidy table
df <- ref_acc %>%
filter(metric == "Predicted") %>%
select(reference,precision, recall, f1_score)
head(df)
# Create a tidy table
# Set font name for table
fontname <- "Times New Roman"
# Clean up extra rows and digits
cleaned <- df %>%
mutate_if(is.double, ~ round(., digits = 4)) %>%
# Sort from fastest to slowest
arrange(desc(f1_score))
# Fix names and add units
names(cleaned) <- c("Data Source","Precision","Recall","F1-score")
# Create the flextable
ft <- flextable::flextable(cleaned) %>%
flextable::font(fontname = fontname, part = "all") %>%
flextable::autofit() %>%
flextable::fit_to_width(7.5)
ft
Data Source | Precision | Recall | F1-score |
|---|---|---|---|
TrueLabel | 0.9516 | 0.9116 | 0.9311 |
treemap | 0.8087 | 0.9302 | 0.8652 |
lfevt | 0.8168 | 0.8995 | 0.8562 |
itsp | 0.7959 | 0.8669 | 0.8299 |
print(ft, preview = "docx")
rm(df,ref_acc)
aggr.sr <- aggr.sr %>%
mutate(source = if_else(source == "usfs_treemap16_balive_int_bin_srme_10m", "USFS TreeMap", source))
glimpse(aggr.sr)
Rows: 750
Columns: 10
$ block <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 2…
$ tp <dbl> 288, 0, 1410, 16, 90669, 0, 368, 396, 44319, 62550, 62944, 68, 111143, 9013, 0, 0, 0, 0, 34474, 487164,…
$ fp <dbl> 653, 2, 3864, 714, 477299, 225, 3639, 7111, 76792, 189814, 102800, 2669, 72030, 83822, 927, 231, 31, 66…
$ fn <dbl> 4790, 468, 7239, 1118, 75636, 0, 16144, 6810, 106960, 225459, 126158, 2334, 677624, 45413, 9, 0, 0, 0, …
$ blocksize <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ precision <dbl> 0.30605739, 0.00000000, 0.26734926, 0.02191781, 0.15963751, 0.00000000, 0.09183928, 0.05275077, 0.36593…
$ recall <dbl> 0.05671524, 0.00000000, 0.16302463, 0.01410935, 0.54519708, NA, 0.02228682, 0.05495420, 0.29296201, 0.2…
$ f1 <dbl> 0.09569696, NA, 0.20254256, 0.01716738, 0.24696264, NA, 0.03586919, 0.05382995, 0.32540842, 0.23150676,…
$ source <chr> "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFI…
$ region <chr> "Southern Rockies", "Southern Rockies", "Southern Rockies", "Southern Rockies", "Southern Rockies", "So…
Remove blocks with little to no aspen forest cover. First look at the distribution of aspen forest cover across spatial blocks.
# Plot the distribution of aspen area
blocks %>%
st_set_geometry(NULL) %>%
as_tibble() %>%
mutate(s2aspen_sum = as.integer(s2aspen_sum)) %>%
ggplot(aes(x=s2aspen_sum)) +
scale_x_continuous(trans="log") +
geom_histogram() +
theme_minimal(12)
# Filter out non-aspen blocks
blocks_aspen <- blocks %>%
filter(s2aspen_sum > 10)
flabs <- c(precision = "Precision", recall = "Recall", f1 = "F1-score")
# Reshape the agreement table for plotting facet wrap
# Southern Rockies (by block)
aggr.sr.m <- reshape2::melt(
aggr.sr, id.vars = c("blocksize", "source", "region", "block"),
measure.vars = c("precision", "recall", "f1"), variable.name = "statistic"
) %>%
na.omit() %>%
filter(blocksize == 1,
block %in% blocks_aspen$id)
# White River NF
aggr.wr.m <- reshape2::melt(
aggr.wr, id.vars = c("blocksize", "source", "region"),
measure.vars = c("precision", "recall", "f1"), variable.name = "statistic"
) %>%
na.omit() %>%
filter(blocksize == 1)
# Facet wrap plot of the statistics by block group
f7 <- ggplot(data=aggr.sr.m, aes(x=factor(blocksize), y=value, fill=factor(source))) +
geom_boxplot(position = position_dodge()) +
scale_fill_viridis_d(labels=c("LANDFIRE EVT","USFS TreeMap","USFS ITSP"), name="",
breaks=c("LANDFIRE EVT", "USFS TreeMap", "USFS ITSP")) +
facet_wrap(~statistic, labeller = labeller(statistic = flabs)) +
theme_bw(11) +
theme(legend.position = "bottom",
legend.box = "vertical",
legend.justification = c(0.5,0.5), # Left-aligns the legends
legend.spacing.y = unit(-5, "pt"), # Adjusts the gap between the legends
# legend.text = ggtext::element_markdown(halign = 0), # This aligns each individual legend's text to the left
axis.title.x=element_blank(), # Remove x axis title
axis.text.x=element_blank(), # Remove x axis text
axis.ticks.x=element_blank(), # Remove x axis ticks
axis.title = element_text(size=11,face="italic")) +
# text = element_text(family = "Times New Roman")) +
labs(x="Blocksize", y="Value")
# Add the WRNF statistics
f7 <- f7 +
geom_point(data=aggr.wr.m, aes(x=factor(blocksize), y=value, group=factor(source)),
position=position_dodge(0.75), shape=18, size=4, color="white") +
# Then add the actual point in black
geom_point(data=aggr.wr.m, aes(x=factor(blocksize), y=value, group=factor(source)),
position=position_dodge(0.75), shape=18, size=3, color="black") +
scale_shape_manual(name="Subregion", values=c("White River NF" = 8)) +
guides(
fill=guide_legend(override.aes=list(shape=NA)), # No shape in the fill legend
shape=guide_legend(title=" ", override.aes=list(color="black")) # Shape legend for Subregion
)
f7
# Save out
ggsave(f7, file = "../../figures/Figure7_Agreement_Box.png",
dpi = 300, bg="white") # adjust dpi accordingly
# Add a spatial map of statistics by blocks
blocks_ <- blocks %>%
mutate(id = as.numeric(id)-1) %>%
rename(block = id) %>%
left_join(aggr.sr, by="block") %>%
pivot_longer(cols = c(precision, recall, f1), names_to = "statistic", values_to = "value") %>%
na.omit()
f7b <- ggplot(data = blocks_) +
geom_sf(aes(fill = value)) +
geom_sf(data=srme, fill=NA, color="grey20", linewidth=0.4) +
facet_grid(source ~ statistic) +
scale_fill_viridis_c(option = "C") + # Use a continuous color scale
theme_void() +
theme(
strip.text.x = element_text(size = 10),
legend.position = "bottom"
) +
guides(fill = guide_colourbar(direction = "horizontal", barwidth = 10, barheight = 0.60,
ticks=F, title.position = "left"),
label.theme = element_text(angle = 0, size = 9)) +
labs(fill="", tag="B")
f7b
Join the two plots.
f7 <- f7 + theme(plot.margin = unit(c(0.5, 10, 0.5, 0.5),"mm")) # Add right margin to the left plot
f7b <- f7b + theme(plot.margin = unit(c(0.5, 0.5, 10, 0.5),"mm")) # Add left margin to the right plot
arr7 <- ggarrange(f7, f7b, ncol=2, align="h", widths = c(1.5,1))
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.
arr7
# Save out
ggsave(arr7, file = "../../figures/Figure7_Agreement_Box_Maps_Arr.png",
dpi = 300, bg="white") # adjust dpi accordingly
Saving 7.29 x 4.51 in image
brewer.pal(n=5,"Set1")
cols <- c("#4DAF4A", "#984EA3", "#FF7F00")
glimpse(ref.global)
# # Get an average f1 score table
# f1mn <- ref.global %>%
# group_by
# Add a statistics column for legend (could do this for F1 too ...)
ref.global.m <- reshape2::melt(
ref.global, id.vars = c("blocksize", "source", "region"),
measure.vars = c("prec", "rec"), variable.name = "statistic"
)
glimpse(ref.global.m)
f7a <- ggplot(data=ref.global.m) +
geom_line(aes(x=blocksize,y=value,color=factor(source), linetype=statistic), linewidth=0.6) +
geom_point(data=ref.global, aes(x=blocksize, y=f1, color=source)) +
scale_x_continuous(breaks=c(1,3,5,7,9)) +
scale_linetype_manual(values = c("prec" = "dotted", "rec" = "solid"), name="Statistic: ",
labels=c("Precision","Recall")) +
facet_wrap(~region, scales = "fixed") +
scale_color_manual(values=cols, labels=c("LANDFIRE EVT","USFS TreeMap","USFS ITSP"), name="Source: ") +
theme_light(14) +
ylim(0,1) +
theme(legend.position = "top",
legend.box = "vertical",
legend.justification = c(0.5,0.5), # Left-aligns the legends
legend.spacing.y = unit(-10, "pt"), # Adjusts the gap between the legends
legend.text = ggtext::element_markdown(halign = 0), # This aligns each individual legend's text to the left
axis.title = element_text(size=12,face="italic"),
text = element_text(family = "Arial")) +
labs(x="Blocksize (pixels)",y="Value")
f7a
ggsave(f7a, file = "../../figures/Figure7_Global_PrecRec_Agreement.png", dpi=300)
Clean up!
rm(ref.global,ref.global.m,f7a,f7b,f7,ref.focal)
gc()
class.sr_ <- class.sr %>%
mutate(source = if_else(source=="lc16_evt_200_bin_srme_10m", "LANDFIRE EVT", source),
source = if_else(source=="s2aspen_prob_10m_binOpt_srme", "Sentinel-based Map", source),
source = if_else(source=="usfs_itsp_aspen_ba_gt10_srme_10m", "USFS ITSP", source),
source = if_else(source=="usfs_treemap16_balive_int_bin_srme_10m", "USFS TreeMap", source))
patch.sr_ <- patch.sr %>%
mutate(source = if_else(source=="lc16_evt_200_bin_srme_10m", "LANDFIRE EVT", source),
source = if_else(source=="s2aspen_prob_10m_binOpt_srme", "Sentinel-based Map", source),
source = if_else(source=="usfs_itsp_aspen_ba_gt10_srme_10m", "USFS ITSP", source),
source = if_else(source=="usfs_treemap16_balive_int_bin_srme_10m", "USFS TreeMap", source),
area_qt = factor(ntile(area, 4), labels = c("Q1", "Q2", "Q3", "Q4")))
glimpse(patch.sr_)
Rows: 4,023,649
Columns: 9
$ ...1 <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, …
$ area <dbl> 0.27, 0.18, 0.18, 0.18, 0.18, 0.09, 0.09, 0.09, 0.36, 0.27, 0.09, 0.18, 0.36, 0.18, 0.45, 2.07, 0.…
$ perimeter <dbl> 240, 240, 240, 240, 240, 120, 120, 120, 360, 240, 120, 180, 360, 180, 360, 1440, 360, 180, 300, 18…
$ perimeter_area_ratio <dbl> 888.8889, 1333.3333, 1333.3333, 1333.3333, 1333.3333, 1333.3333, 1333.3333, 1333.3333, 1000.0000, …
$ shape_index <dbl> 1.090909, 1.333333, 1.333333, 1.333333, 1.333333, 1.000000, 1.000000, 1.000000, 1.500000, 1.090909…
$ region <chr> "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "s…
$ source <chr> "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "L…
$ block_id <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
$ area_qt <fct> Q3, Q3, Q3, Q3, Q3, Q2, Q2, Q2, Q4, Q3, Q2, Q3, Q4, Q3, Q4, Q4, Q3, Q3, Q3, Q3, Q3, Q2, Q3, Q2, Q3…
glimpse(class.sr_)
Rows: 339
Columns: 8
$ ...1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ total_area <dbl> 1663.05, 1512.79, 2880.09, 1891.05, 7887.67, 12885.39, 11173.34, 9641.01, 11779.70, 7447.66, 9000.44, 2711.0…
$ prop_area <dbl> 4.400219, 3.822703, 7.054768, 4.771215, 17.424149, 24.233280, 21.774401, 19.439621, 22.823152, 15.820654, 18…
$ n_patch <dbl> 2729, 2093, 4281, 2614, 9527, 11541, 11707, 8969, 10877, 7553, 15565, 6617, 6633, 4600, 3055, 5846, 3477, 40…
$ patch_den <dbl> 7.220587, 5.288849, 10.486291, 6.595254, 21.045488, 21.704914, 22.814388, 18.084616, 21.074172, 16.044422, 3…
$ region <chr> "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srme", "srm…
$ source <chr> "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EVT", "LANDFIRE EV…
$ block_id <dbl> 4, 8, 9, 10, 12, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 37, 38, 42, 43, 45, 46, 47, 48, 49, 51, 55, 56, 57,…
class.wr_ <- class.wr %>%
mutate(region = "wrnf") %>%
mutate(source = if_else(source=="lc16_evt_200_bin_wrnf_10m", "LANDFIRE EVT", source),
source = if_else(source=="s2aspen_prob_10m_binOpt_wrnf", "Sentinel-based Map", source),
source = if_else(source=="usfs_itsp_aspen_ba_gt10_wrnf_10m", "USFS ITSP", source),
source = if_else(source=="usfs_treemap16_balive_int_bin_wrnf_10m", "USFS TreeMap", source))
patch.wr_ <- patch.wr %>%
mutate(region = "wrnf") %>%
mutate(source = if_else(source=="lc16_evt_200_bin_wrnf_10m", "LANDFIRE EVT", source),
source = if_else(source=="s2aspen_prob_10m_binOpt_wrnf", "Sentinel-based Map", source),
source = if_else(source=="usfs_itsp_aspen_ba_gt10_wrnf_10m", "USFS ITSP", source),
source = if_else(source=="usfs_treemap16_balive_int_bin_wrnf_10m", "USFS TreeMap", source),
area_qt = factor(ntile(area, 4), labels = c("Q1", "Q2", "Q3", "Q4")))
rm(patch.sr,class.sr,patch.wr,class.wr)
Get some summary statistics:
class.sr_ %>%
group_by(source) %>%
summarize(
total_area_km2 = sum(total_area) * 0.01
) %>%
ggplot(aes(x=reorder(source, total_area_km2), y=total_area_km2)) +
geom_bar(stat="identity", fill="light green") +
theme_light(12)
df1 <- class.sr_ %>%
group_by(source) %>%
summarize(
total_area_km2 = sum(total_area) * 0.01,
n_patch_total = sum(n_patch),
patch_density_mn = mean(patch_den)
)
df2 <- patch.sr_ %>%
group_by(source) %>%
summarize(
patch_size_mn = mean(area),
per_ar_r_mn = mean(perimeter_area_ratio)
)
df <- left_join(df1,df2,by="source")
head(df)
rm(df1,df2)
# Make a tidy table
# Set font name for table
fontname <- "Times New Roman"
# Clean up extra rows and digits
cleaned <- df %>%
mutate_if(is.double, ~ round(., digits = 2)) %>%
# Sort from fastest to slowest
arrange(total_area_km2)
# Fix names and add units
names(cleaned) <- c("Data Source","Total Area (Km2)","Number of Patches","Patch Density",
"Average Patch Size","Average Perimeter/Area Ratio")
# Create the flextable
ft <- flextable::flextable(cleaned) %>%
flextable::font(fontname = fontname, part = "all") %>%
flextable::autofit() %>%
flextable::fit_to_width(7.5)
ft
Data Source | Total Area (Km2) | Number of Patches | Patch Density | Average Patch Size | Average Perimeter/Area Ratio |
|---|---|---|---|---|---|
Sentinel-based Map | 9,384.41 | 1,760,386 | 35.73 | 0.53 | 2,745.57 |
LANDFIRE EVT | 13,441.59 | 728,370 | 14.22 | 1.85 | 1,060.98 |
USFS TreeMap | 13,931.85 | 1,268,131 | 24.82 | 1.10 | 1,145.88 |
USFS ITSP | 20,477.69 | 266,762 | 4.82 | 7.68 | 941.75 |
print(ft, preview = "docx")
Plot the total area box plot across spatial blocks for the Southern Rockies:
# Combine the White River NF and SRME to plot patch statistics
df <- bind_rows(patch.sr_,patch.wr_)
# Plot facet wrap of patch size
df <- df %>%
group_by(source,region) %>%
summarize(
patch_size = mean(area),
per_ar_r = mean(perimeter_area_ratio)
)
`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.
glimpse(df)
Rows: 8
Columns: 4
Groups: source [4]
$ source <chr> "LANDFIRE EVT", "LANDFIRE EVT", "Sentinel-based Map", "Sentinel-based Map", "USFS ITSP", "USFS ITSP", "USFS …
$ region <chr> "srme", "wrnf", "srme", "wrnf", "srme", "wrnf", "srme", "wrnf"
$ patch_size <dbl> 1.8454344, 3.4151061, 0.5330884, 0.7358793, 7.6763901, 14.8811627, 1.0986130, 2.2167857
$ per_ar_r <dbl> 1060.9830, 1046.2439, 2745.5691, 2872.1164, 941.7452, 944.5034, 1145.8826, 1125.3520
# Get the class metrics in the same format
df2 <- bind_rows(class.sr_,class.wr_) %>%
group_by(source,region) %>%
summarize(
patch_den = mean(patch_den)
)
`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.
df_ <- left_join(df,df2,by=c("region","source"))
# Pivot longer
metric_labels <- c(patch_size = "Patch Size (ha)",
per_ar_r = "Perimeter-Area Ratio",
patch_den = "Patch Density")
df.long <- df_ %>%
pivot_longer(cols = c(contains("_")),
names_to = "metric",
values_to = "value") %>%
mutate(region = if_else(region=="srme","Southern Rockies","White River NF"),
source = if_else(source=="Sentinel-based Map","Sentinel-based",source),
source = factor(source, levels = rev(levels(reorder(source, value)))))
df.long$source <- factor(df.long$source,levels=c('Sentinel-based', 'LANDFIRE EVT', 'USFS TreeMap', 'USFS ITSP'))
cols <- c("#005a32","#a1d99b")
# Grouped bar chart
f7 <- ggplot(data=df.long, aes(x=source, y=value, fill=factor(region))) +
geom_bar(stat="identity", position=position_dodge()) +
scale_fill_manual(values=cols) +
facet_wrap(~metric, scales = "free_y", labeller = labeller(metric = metric_labels)) + # Faceting by summary variable
labs(x="", y="Value", fill="Region") + # Adding labels
theme_bw(10) +
theme(legend.position = "top") +
theme(axis.text.x = element_text(angle=45, hjust=1)) # Improving readability of x-axis labels
f7
ggsave(f7, file = "../../figures/Figure7_Landscape_Summaries.png", dpi=300)
Saving 6.5 x 3.5 in image
Calculate some grouped statistics (to compare with the class metrics results)
# Also create a grouped summary
(patch.summary <- patch_metrics %>%
group_by(source,region) %>%
summarize(area_md = median(area), # convert to hectares
area_mn = mean(area),
area_sum = sum(area),
perim_md = median(perimeter),
perim_mn = mean(perimeter),
par_md = median(perimeter_area_ratio),
par_mn = mean(perimeter_area_ratio),
si_md = median(shape_index),
si_mn = mean(shape_index)) %>%
ungroup())
(patch.long <- patch.summary %>%
pivot_longer(cols = c(contains("_")),
names_to = "metric",
values_to = "value"))
patch.long$source <- factor(patch.long$source,levels=c('Aspen10m', 'Aspen30m', 'LFEVT', 'TreeMap', 'ITSP'))
(ggplot(data = patch.long, aes(x=source, y=value, fill=region)) +
geom_bar(stat="identity", position="dodge", width=0.7) +
facet_wrap(~metric, scales="free") +
theme_minimal(14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
text = element_text(family = "Arial")))
(f8s <- ggplot(data = patch.long, aes(x=source, y=value, fill=region)) +
geom_bar(stat="identity", position="dodge", width=0.7) +
theme_minimal(14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
text = element_text(family = "Arial")))
Grab some summary statistics on the patch dynamics:
a <- patch.long %>% filter(region=="SRME",metric=="area_mn",source=="Aspen10m")
b <- patch.long %>% filter(region=="SRME",metric=="area_mn",source=="LFEVT")
print("Difference in mean patch size for the SRME: ")
((b$value-a$value)/b$value)*100
a <- patch.long %>% filter(region=="WRNF",metric=="area_mn",source=="Aspen10m")
b <- patch.long %>% filter(region=="WRNF",metric=="area_mn",source=="LFEVT")
print("Difference in mean patch size for the WRNF: ")
((b$value-a$value)/b$value)*100
print("~~~~~~~~~~~~~~~~")
print("Difference in mean patch size for the SRME: ")
((b$value-a$value)/b$value)*100
(c <- patch.long %>% filter(metric=="area_md",source=="Aspen10m"))
(d <- patch.long %>% filter(metric=="area_md",source=="LFEVT"))
print("Mean and median patch sizes: ")
paste0("Aspen10m mean: ",a$value)
paste0("LFEVT mean: ",b$value)
paste0("Aspen10m median: ",c$value)
paste0("LFEVT mean: ",d$value)
print("Difference in median area: ")
((d$value-c$value)/d$value)*100
print("Mean Perimeter area ratio: ")
(a <- patch.long %>% filter(metric=="par_mn",source=="Aspen10m"))
(b <- patch.long %>% filter(metric=="par_mn",source=="LFEVT"))
print("Difference in PAR: ")
((b$value-a$value)/b$value)*100
rm(a,b,c,d)
Boxplot as facet wrap for patch metrics:
brewer.pal(n=5,"Set1")
cols <- c("#005a32","#a1d99b")
(df <- patch_metrics %>%
# mutate(area_ha = area*100) %>%
select(-c(X,index,shape_index)) %>%
pivot_longer(contains(c("area","perimeter_area_ratio"))) %>%
arrange(source, desc(value)) %>%
mutate(name = as.character(name),
name = recode(name,
"area" = "Patch Size (ha)",
"perimeter_area_ratio" = "Perimeter/Area Ratio")))
df$source <- factor(df$source,
levels=c('Aspen10m', 'Aspen30m', 'LFEVT', 'TreeMap', 'ITSP'))
(f8a <- ggplot(data=df, aes(y = value, x = factor(source), fill = factor(region))) +
geom_boxplot(outlier.size = 0.2) +
scale_y_continuous(trans="log10",labels=label_number_si(accuracy = 1)) +
scale_fill_manual(values=cols) +
facet_wrap(~name, scales = "free_y", nrow=1) +
labs(x="\nData Source",y="Value",title="Patch Metrics", fill="Region: ") +
theme_light(12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size=11),
axis.text.y = element_text(angle = 0, vjust=0.1, size=11),
axis.title.y = element_text(size=12, face="italic", vjust=1),
axis.title.x = element_text(size=12, face="italic", vjust=-1),
plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm"),
legend.position="top",
legend.spacing.x = unit(0.5,"cm"),
text = element_text(family = "Arial")))
ggsave(f8a, file = "../../figures/Figure8A_patch_metrics.png", dpi = 300) # adjust dpi accordingly
Plot class metrics as facet wrap:
# Calculate area in square meters
srme_area_km2 <- st_area(srme) / 1e6
wrnf_area_km2 <- st_area(wrnf) / 1e6
(df <- class_metrics %>%
mutate(total_area = total_area*0.01,
prop_area = if_else(region=="SRME", as.double((total_area/srme_area_km2*100)), 0.0),
prop_area = if_else(region=="WRNF", as.double((total_area/wrnf_area_km2*100)), prop_area)) %>%
select(-c(X)) %>%
pivot_longer(cols = c(contains("_")),
names_to = "metric",
values_to = "value") %>%
mutate(metric = as.character(metric),
metric = recode(metric,
"n_patch" = "Number of Patches",
"patch_den" = "Patch Density",
"total_area" = "Total area (km2)",
"prop_area" = "Proportion of Area")))
df$source <- factor(df$source, levels=c('Aspen10m', 'Aspen30m', 'LFEVT', 'TreeMap', 'ITSP'))
head(df)
(f8b <- ggplot(data=df, aes(y=value, x=factor(source), fill=factor(region))) +
geom_bar(stat="identity", position="dodge", width=0.7) +
scale_y_continuous(labels=scales::label_number_si(accuracy = 1)) +
scale_fill_manual(values=cols) +
labs(x="", y="Value", title="Landscape Metrics", fill="Region: ") +
theme(axis.title.x = NULL) +
facet_wrap(~metric, scales = "free_y") +
theme_light(12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size=11),
axis.text.y = element_text(angle = 0, vjust=0.1, size=11),
axis.title.y = element_text(size=12, face="italic", vjust=0.5),
axis.title.x = element_text(size=12, face="italic", vjust=-0.5),
plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm"),
legend.position = "top",
legend.spacing.x = unit(0.5,"cm"),
text = element_text(family = "Arial")))
ggsave(f8b, file = "../../figures/Figure8B_landscape_metrics.png", dpi = 300) # adjust dpi accordingly
(arr <- ggarrange(f8b,f8a,nrow=2,ncol=1,labels=c("A","B"), common.legend = T))
ggsave(arr, file="../../figures/Figure8_Landscape_Patch_Metrics.png", dpi=300)
The 10-m aspen classification identified Xx more patch than reference images:
print("% Difference in number of patches across the Southern Rockies: ")
a <- df %>% filter(region=="SRME",metric=="Number of Patches",source=="Aspen10m")
b <- df %>% filter(region=="SRME",metric=="Number of Patches",source=="LFEVT")
((b$value-a$value)/b$value)*100
print("% Difference in number of patches across the White River NF: ")
a <- df %>% filter(region=="WRNF",metric=="Number of Patches",source=="Aspen10m")
b <- df %>% filter(region=="WRNF",metric=="Number of Patches",source=="LFEVT")
((b$value-a$value)/b$value)*100
print("~~~~~~~~~~~~~")
print("~~~~~~~~~~~~~")
print("% Difference in patch density: ")
a <- df %>% filter(metric=="Patch Density",source=="Aspen10m")
b <- df %>% filter(metric=="Patch Density",source=="LFEVT")
((b$value-a$value)/b$value)*100
print("% Difference in total area:")
a <- df %>% filter(metric=="Total area (km2)",source=="Aspen10m")
aa <- df %>% filter(metric=="Total area (km2)",source=="Aspen30m")
b <- df %>% filter(metric=="Total area (km2)",source=="LFEVT")
((b$value-a$value)/b$value)*100
((b$value-aa$value)/b$value)*100
rm(a,aa,b)
Figure S1: Quaking Aspen Phenology
Load the spatial block grid and phenology time-series summaries by spatial block for the Southern Rockies. Join the phenology summaries to their geometries.
# Load the grids, keep the elevation attribute and TreeMap sum (aspen area)
grid <- st_read('../../data/spatial/mod/boundaries/spatial_block_grid_50km2.gpkg',
quiet=T) %>%
select(grid_id,elevation_mn,treemap_sum,treemap_pct)
# Load the phenology by spatial block grid
phenology <- read_csv('../../data/tabular/mod/phenology/viirs_phenology_by_grid_in_aspen.csv',
show_col_types = FALSE) %>%
rename(fid = `system:index`) %>%
mutate(grid_id = str_sub(fid, -7),
year = as.integer(str_sub(fid, 1, 4))) %>%
select(-c(fid,label,count,.geo)) %>%
# Create DOY versions of the phenology metrics for plotting and statistical analysis
mutate(
season_length = Growing_Season_Length_1_median,
doy_midgreenup = yday(Date_Mid_Greenup_Phase_1_median),
doy_midsenescence = yday(Date_Mid_Senescence_Phase_1_median),
doy_green_dec = yday(Onset_Greenness_Decrease_1_median),
doy_green_inc = yday(Onset_Greenness_Increase_1_median),
doy_green_max = yday(Onset_Greenness_Maximum_1_median),
doy_green_min = yday(Onset_Greenness_Minimum_1_median)
) %>%
left_join(grid%>%as_tibble(), by="grid_id") %>%
select(grid_id,year,elevation_mn,season_length,doy_midgreenup,doy_midsenescence,
doy_green_dec,doy_green_inc,doy_green_max,doy_green_min)
glimpse(phenology)
Rows: 1,110
Columns: 10
$ grid_id <chr> "-233+82", "-234+80", "-234+81", "-234+82", "-234+83", "-234+84", "-234+85", "-234+86", "-234+87", "-…
$ year <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013,…
$ elevation_mn <dbl> 2116.571, 2115.817, 2178.622, 2414.847, 2247.682, 2034.530, 1711.193, 2381.485, 2113.312, 2315.858, 2…
$ season_length <dbl> 200, 204, 130, 192, 196, 190, 186, 204, 182, 176, 186, 182, 192, 184, 190, 217, 212, 183, 183, 168, 1…
$ doy_midgreenup <dbl> 160, 166, 170, 160, 155, 159, 146, 153, 159, 164, 160, 157, 155, 155, 157, 145, 154, 161, 161, 158, 1…
$ doy_midsenescence <dbl> 287, 281, 267, 281, 280, 276, 281, 280, 267, 275, 280, 276, 280, 275, 276, 275, 286, 278, 276, 269, 2…
$ doy_green_dec <dbl> 254, 241, 263, 248, 248, 247, 241, 247, 237, 247, 248, 246, 249, 245, 244, 241, 250, 250, 247, 237, 2…
$ doy_green_inc <dbl> 125, 113, 142, 124, 117, 124, 119, 110, 122, 133, 126, 125, 119, 122, 121, 99, 108, 135, 132, 131, 13…
$ doy_green_max <dbl> 190, 190, 197, 193, 189, 188, 177, 190, 186, 198, 193, 186, 188, 184, 189, 186, 192, 187, 186, 183, 1…
$ doy_green_min <dbl> 322, 316, 271, 312, 314, 309, 320, 313, 297, 301, 312, 305, 311, 306, 311, 313, 320, 314, 313, 298, 3…
Spatial maps of the key phenology metrics.
# Calculate the median phenology metrics
phenology.md <- phenology %>%
group_by(grid_id) %>%
summarise(across(starts_with("doy"), median, na.rm=T))
# Join to the spatial data and pivot longer
grid.l <- grid %>%
left_join(phenology.md, by="grid_id") %>%
pivot_longer(cols = starts_with("doy"), names_to = "metric", values_to = "value") %>%
filter(treemap_pct > 0.05)
# Tidy the names for plotting
metric_names <- c(
doy_green_inc = "Onset Greenness Increase",
doy_midgreenup = "Mid Greenup Phase",
doy_green_max = "Onset Greenness Maximum",
doy_green_dec = "Onset Greenness Decrease",
doy_midsenescence = "Mid Senescence Phase",
doy_green_min = "Onset Greenness Minimum"
)
grid.l$metric <- factor(
grid.l$metric,
levels = ordered_metrics,
labels = metric_names
)
metrics <- unique(grid.l$metric)
plots <- list()
for (m in metrics){
gdf <- grid.l %>% filter(metric == m)
# Map
p <- ggplot() +
geom_sf(data=grid, fill=NA) +
geom_sf(data=gdf, aes(fill=value)) +
scale_fill_continuous(low = "lightgreen", high = "darkgreen",
labels = function(x) format(as.Date(as.numeric(x), origin="1970-01-01"), "%b %d")) +
geom_sf(data=srme,fill=NA,color="black",size=2.5) +
labs(fill="", title=m) +
theme_void(12) +
theme(legend.position = "right",
legend.text = element_text(angle = 45, size=9), # Ensure labels are not rotated
plot.title = element_text(hjust = 0.5, size=10)
) +
guides(fill = guide_colourbar(
barwidth = 0.5,
barheight = 8,
ticks = FALSE,
label.position = "right",
label.theme = element_text(size = 8))
)
print(p)
plots[[m]] <- p
}
Create a combined plot for four metrics.
fs1a <- ggarrange(plots[[1]], plots[[3]], plots[[2]], plots[[6]],
ncol = 2, nrow = 2, widths=c(1,1), heights=c(1,1),
align = "hv") +
labs(tag="A")
fs1a
Plot the relationship with elevation across metrics.
(fs1c <- ggplot(data=grid.l, aes(x=value,y=elevation_mn)) +
geom_smooth(method="lm",colour="gray40", fill="gray70", size=0.8) +
geom_point(size=0.8) +
facet_wrap(~metric, scales="free_x") +
theme_light(12) +
theme(plot.title = element_text(size=10),
axis.text = element_text(size=8),
axis.title = element_text(size=10),
strip.text = element_text(size=8)) +
labs(x="Day of Year",y="Elevation", tag="B"))
# Save out
ggsave(fs1c, file="../../figures/FigureS1C_Phenology_by_Elevation.png", dpi=300)
Merge the two plots.
(fs1 <- ggarrange(fs1a, fs1c, nrow=2, ncol=1, widths = c(0.6, 1), heights = c(1, 0.7)))
`geom_smooth()` using formula = 'y ~ x'
ggsave(fs1, file="../../figures/FigureS1_Phenology.png", dpi=300)
Saving 5.5 x 7.5 in image
# Reshape from wide to long format
phenology.l <- phenology %>%
pivot_longer(
cols = c(doy_midgreenup,doy_midsenescence,doy_green_dec,
doy_green_inc,doy_green_max,doy_green_min),
names_to = "metric",
values_to = "doy"
)
ordered_metrics <- c(
"doy_green_inc","doy_midgreenup","doy_green_max",
"doy_green_dec","doy_midsenescence","doy_green_min")
metric_names <- c(
doy_green_inc = "Onset Greenness Increase",
doy_midgreenup = "Mid Greenup Phase",
doy_green_max = "Onset Greenness Maximum",
doy_green_dec = "Onset Greenness Decrease",
doy_midsenescence = "Mid Senescence Phase",
doy_green_min = "Onset Greenness Minimum"
)
phenology.l$metric <- factor(
phenology.l$metric,
levels = ordered_metrics,
labels = metric_names
)
# Create the plot with facet_wrap
ggplot(phenology.l, aes(x=factor(year), y=doy)) +
geom_boxplot() +
facet_wrap(~ metric, scales = "fixed") +
theme_minimal() +
labs(title = "DOY Phenology Metrics (2013-2022)",
x = "Year",
y = "Day of Year",
fill = "Year") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Stacked box plots
ggplot(phenology.l, aes(x=factor(year), y=doy, fill=metric)) +
geom_boxplot(width=1) +
theme_minimal() +
labs(x = "Year",
y = "Day of Year",
fill = "Metric") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top")
# Spatial map(s) of key phenological metrics
breaks_dates <- as.Date(c("2020-01-01", "2021-01-01", "2022-01-01"))
breaks_numeric <- as.numeric(breaks_dates) # Convert to numeric (days since 1970-01-01, by default)
labels_dates <- format(breaks_dates, "%Y-%m-%d")
p1 <- ggplot(data=blocks) +
geom_sf(aes(fill=Maturity_1)) +
scale_fill_continuous(low = "lightgreen", high = "darkgreen",
labels = function(x) format(as.Date(as.numeric(x), origin="1970-01-01"), "%B %d")) +
geom_sf(data=srme,fill=NA,color="black",size=2.5) +
labs(fill="Maturity") +
guides(fill = guide_colourbar(barwidth = 0.5, barheight = 5.5, ticks=F,
label.position = "right", title.position = "top",
label.theme = element_text(angle = 0, size = 8))) +
theme_void()
p2 <- ggplot(data=blocks) +
geom_sf(aes(fill=Dormancy_1)) +
scale_fill_continuous(low = "#ffffd4", high = "#993404",
labels = function(x) format(as.Date(as.numeric(x), origin="1970-01-01"), "%B %d")) +
geom_sf(data=srme,fill=NA,color="black",size=2.5) +
labs(fill="Dormancy") +
guides(fill = guide_colourbar(barwidth = 0.5, barheight = 5.5, ticks=F,
label.position = "right", title.position = "top",
label.theme = element_text(angle = 0, size = 8))) +
theme_void()
ggarrange(p1,p2)
# # Add a statistics column for legend (could do this for F1 too ...)
# blocks.m <- reshape2::melt(
# blocks, id.vars = c("id","elevation_mn"),
# measure.vars = c("Greenup_1","MidGreenup_1","Maturity_1","Peak_1",
# "MidGreendown_1","Senescence_1","Dormancy_1"), variable.name = "metric"
# )
# Add a statistics column for legend (could do this for F1 too ...)
blocks.m <- reshape2::melt(
blocks, id.vars = c("id","elevation_mn"),
measure.vars = c("Greenup_1","Maturity_1","Peak_1","Dormancy_1"), variable.name = "metric"
) %>%
mutate(metric = as.character(metric),
metric = as.factor(recode(metric,
"Greenup_1" = "Greenup",
"Maturity_1" = "Maturity",
"Peak_1" = "Peak Greenness",
"Dormancy_1" = "Dormancy")))
blocks.m$metric <- factor(blocks.m$metric, levels=c('Greenup', 'Maturity', 'Peak Greenness', 'Dormancy'))
head(blocks.m)
# Dot plot of phenology metric by elevation for all blocks
(p5 <- ggplot(data=blocks.m, aes(x=value,y=elevation_mn)) +
geom_smooth(method="lm",colour="gray40", fill="gray70", size=0.8) +
geom_point(size=1.2) +
facet_wrap(~metric, scales="free_x") +
theme_light(12) +
labs(x="Date",y="Elevation"))
(p6 <- ggplot(data=blocks.m, aes(x=value,y=elevation_mn,color=metric)) +
geom_point(size=1.2) +
theme_light(12))
ROC, F1, MCC:
accmeas <- read_csv('../../data/tabular/mod/results/accmeas_prop.csv')
# Calculate the averages for each cutoff value across model runs
accmeas.mn <- accmeas %>%
mutate(cutoff = as.character(cutoff)) %>%
group_by(cutoff) %>%
summarise(
prMn = mean(prSize,na.rm=T), # size of presence data (mean)
prSd = sd(prSize,na.rm=T), # size of presence data (sd)
bgMn = mean(bgSize,na.rm=T), # size of background data (mean)
bgSd = sd(bgSize,na.rm=T), # size of background data (sd)
fprMn = mean(fpr,na.rm=T), # False Positive Rate (mean)
fprSd = sd(fpr,na.rm=T),
tprMn = mean(tpr,na.rm=T), # True Positive Rate (mean)
tprSd = sd(tpr,na.rm=T),
precision = mean(precision,na.rm=T), # precision (mean)
precisionSd = sd(precision,na.rm=T),
recall = mean(recall,na.rm=T), # recall (mean)
recallSd = sd(recall,na.rm=T),
f1 = mean(f1,na.rm=T), # F1 score (mean)
f1Sd = sd(f1,na.rm=T),
gmean = mean(gmean,na.rm=T), # Geometric Mean (mean)
gmeanSd = sd(gmean,na.rm=T),
mcc = mean(mcc,na.rm=T), # Matthew's Correlation Coefficient (mean)
mccSd = sd(mcc,na.rm=T),
accuracy = mean(accuracy,na.rm=T), # Overall accuracy (mean)
accuracySd = sd(accuracy,na.rm=T)
) %>%
ungroup() %>%
mutate(cutoff = round(as.double(cutoff),3))
# Calculate optimum threshold based on F1 statistic
cutoffOptMn = accmeas.mn[which.max(accmeas.mn$f1),]$cutoff
precisionOptF1Mn = accmeas.mn[which.max(accmeas.mn$f1),]$precision
recallOptF1Mn = accmeas.mn[which.max(accmeas.mn$f1),]$recall
fprOptMn = accmeas.mn[which.max(accmeas.mn$f1),]$fprMn
tprOptMn = accmeas.mn[which.max(accmeas.mn$f1),]$tprMn
max_f1 <- max(accmeas.mn$f1, na.rm = TRUE)
# ROC Curve and label the AUC value (approximation)
# Get the AUC approx
# Ensure data is sorted by FPR
accmeas_mn_sorted <- accmeas.mn[order(accmeas.mn$fprMn), ]
# Compute AUC using trapz function in 'pracma'
auc_avg_approx <- trapz(accmeas_mn_sorted$fprMn, accmeas_mn_sorted$tprMn)
fs2a <- ggplot(data=accmeas) +
geom_line(aes(x=fpr,y=tpr,color=factor(model)),linewidth=0.4) +
scale_color_viridis_d(option="turbo") +
geom_line(data=accmeas.mn, aes(x = fprMn, y = tprMn), color = "black", size = 0.8) + # ROC curve average
scale_x_continuous(limits=c(0,1)) + # Set x axis limits from 0 to 1
scale_y_continuous(limits=c(0,1)) +
labs(x='False Positive Rate', y='True Positive Rate', tag="A") +
geom_point(aes(x = fprOptMn, y = tprOptMn),
color = "red", size = 3, shape = 19) + # optimal threshold point
geom_text(aes(x=fprOptMn, y=tprOptMn),
label = paste0('Optimal threshold: ', round(cutoffOptMn,3),'\nApprox. AUC: ', round(auc_avg_approx,3)),
nudge_x = 0.38, nudge_y = -0.08, size = 3.5) +
coord_fixed(ratio = 1) + # to keep the x and y axes scales same
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "black") + # diagonal
theme_light() +
theme(legend.position = "none",
axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
strip.text.x = element_text(size = 11))
fs2b <- ggplot(data=accmeas) +
geom_line(aes(x=cutoff,y=f1,color=factor(model)),linewidth=0.4) +
scale_color_viridis_d(option="turbo") +
geom_line(data=accmeas.mn, aes(x=cutoff, y = f1), color = "black", size = 0.8) + # ROC curve average
# geom_vline(xintercept=0.424, linetype="dashed", color="black") +
geom_point(aes(x=cutoffOptMn, y=max_f1),
color = "red", size = 3, shape = 19) +
geom_text(aes(x=cutoffOptMn, y=max_f1),
label = paste0('Optimal threshold: ', round(cutoffOptMn,3)),
nudge_x = 0.0, nudge_y = 0.08, size = 3.5) +
scale_x_continuous(limits=c(0,1)) + # Set x axis limits from 0 to 1
scale_y_continuous(limits=c(0,1)) +
labs(x='Threshold', y='F1 Score', tag="B") +
coord_fixed(ratio = 1) + # to keep the x and y axes scales same
theme_light() +
theme(legend.position = "none",
axis.title.y = element_text(size=11,face="italic",
margin = unit(c(0,0.4,0,0), "cm")),
axis.title.x = element_text(size=11,face="italic",
margin = unit(c(0.4,0,0,0), "cm")),
axis.text = element_text(size=10),
strip.text.x = element_text(size = 11))
(fs2 <- ggarrange(fs2a,fs2b))
ggsave(fs2, file="../../figures/FigureS2_AUC_F1Max.png", dpi=300)
rm(fs2,fs2a,fs2b,accmeas_mn_sorted,auc_avg_approx)
Table S1: Performance metrics for the optimum cutoff value
Create a pretty table.
library(flextable, quietly = T)
# Get the best F1 score for each model run
accmeas.best <- accmeas %>%
mutate(model = as.factor(model)) %>%
group_by(model) %>%
top_n(1,f1) %>%
ungroup() %>%
mutate(model_iter = row_number()) %>%
select(model,prSize,bgSize,tpr,precision,recall,f1,cutoff)
head(accmeas.best,10)
# Fix names and add units
names(accmeas.best) <- c("Model Seed",
"# of Presence",
"# of Background",
"True Positive Rate",
"Precision",
"Recall",
"F1-Score",
"Threshold")
# Set font name for table
fontname <- "Times New Roman"
# Create the flextable
(ft1 <- flextable(accmeas.best) %>%
font(fontname = fontname, part = "all") %>%
autofit() %>% fit_to_width(6.5))
print(ft1, preview = "docx")
# Write to a CSV
write_csv(accmeas.best, "../../figures/TableS1_Accuracy_MaxF1.csv")
Clean up!